import type { AstroConfig, AstroIntegration } from "astro";
import fs from "fs/promises";
import path from "path";
import { fileURLToPath } from "url";
import { JSDOM } from "jsdom";
import { SimpleMarkdown } from "./simple-markdown";

export interface PageData {
  pathname: string;
  title: string;
  description?: string;
  content?: string;
  slug?: string;
  order?: number;
  lang?: string; // Add language property for multilingual support
}

export interface LlmsConfig {
  title?: string;
  description?: string;
  includePatterns?: string[];
  excludePatterns?: string[];
  customSeparator?: string;
  i18n?: boolean; // Add i18n switch to enable/disable multilingual support
}

// Simple configuration cache for performance
const configurationCache = new Map<string, Required<LlmsConfig>>();

/**
 * Astro integration to automatically generate AI-friendly documentation files
 * Generates /llms.txt, /llms-small.txt, and /llms-full.txt in build directory only
 */
export default function astroLLMsGenerator(userConfig: LlmsConfig = {}): AstroIntegration {
  let astroConfiguration: AstroConfig;

  return {
    name: "astro-llms-generate",
    hooks: {
      "astro:config:setup": ({ config }) => {
        astroConfiguration = config;
      },

      "astro:build:start": async ({ logger }) => {
        logger.info("Starting LLMs documentation generation...");
      },

      "astro:build:done": async ({ dir, pages, logger }) => {
        const distDirectory = fileURLToPath(dir);
        
        try {
          const config = await generateSmartDefaults(astroConfiguration, userConfig, distDirectory);
          const pageDataList = await discoverAndProcessPages(pages, distDirectory, astroConfiguration);
          
          // Generate main files (no language grouping, for backward compatibility)
          const mainFilePromises = [
            generateLlmsIndexFile(pageDataList, config, distDirectory, astroConfiguration),
            generateLlmsSmallFile(pageDataList, config, distDirectory, astroConfiguration),
            generateLlmsFullFile(pageDataList, config, distDirectory)
          ];
          
          // Check if i18n is enabled
          if (config.i18n) {
            // Group pages by language
            const langGroups: Record<string, PageData[]> = {};
            for (const page of pageDataList) {
              // Use 'und' (undefined language) as default if no language detected
              const lang = page.lang || "und";
              if (!langGroups[lang]) langGroups[lang] = [];
              langGroups[lang].push(page);
            }

            // Generate multilingual files
            const languageFilePromises: Promise<void>[] = [];
            for (const [lang, pages] of Object.entries(langGroups)) {
              // For default language files, we keep the original names
              if (lang === "und") {
                languageFilePromises.push(
                  generateLlmsIndexFile(pages, config, distDirectory, astroConfiguration, "und"),
                  generateLlmsSmallFile(pages, config, distDirectory, astroConfiguration, "und"),
                  generateLlmsFullFile(pages, config, distDirectory, "und")
                );
              } else {
                // For specific languages, add language code to filename
                languageFilePromises.push(
                  generateLlmsIndexFile(pages, config, distDirectory, astroConfiguration, lang),
                  generateLlmsSmallFile(pages, config, distDirectory, astroConfiguration, lang),
                  generateLlmsFullFile(pages, config, distDirectory, lang)
                );
              }
            }
            
            await Promise.all([...mainFilePromises, ...languageFilePromises]);
            logger.info("✅ Generated llms.txt, llms-small.txt, llms-full.txt and language-specific files");
          } else {
            // Only generate main files when i18n is disabled
            await Promise.all(mainFilePromises);
            logger.info("✅ Generated llms.txt, llms-small.txt, and llms-full.txt");
          }
          
          logger.info("Available in build output dir");
        } catch (error) {
          logger.error(`Failed to generate LLMs files: ${error}`);
        }
      },
    },
  };
}

/**
 * Generate smart defaults with caching
 */
async function generateSmartDefaults(
  astroConfig: AstroConfig,
  userConfig: LlmsConfig,
  distDirectory: string
): Promise<Required<LlmsConfig>> {
  const cacheKey = createCacheKey(astroConfig, userConfig);
  
  if (configurationCache.has(cacheKey)) {
    return configurationCache.get(cacheKey)!;
  }

  const packageDescription = await extractPackageDescription();
  const autoGeneratedTitle = generateTitleFromSite(astroConfig.site);

  const completeConfig: Required<LlmsConfig> = {
    title: userConfig.title || autoGeneratedTitle,
    description: userConfig.description || packageDescription || `AI-friendly documentation for ${autoGeneratedTitle}`,
    includePatterns: userConfig.includePatterns || ["**/*"],
    excludePatterns: userConfig.excludePatterns || ["**/404*", "**/500*", "**/api/**"],
    customSeparator: userConfig.customSeparator || "\n\n---\n\n",
    i18n: userConfig.i18n ?? false // Set default value for i18n option
  };

  configurationCache.set(cacheKey, completeConfig);
  return completeConfig;
}

/**
 * Memory-efficient page discovery with smaller batch processing
 */
async function discoverAndProcessPages(
  pages: { pathname: string }[],
  distDirectory: string,
  astroConfig: AstroConfig
): Promise<PageData[]> {
  const processedPages: PageData[] = [];
  const batchSize = 5; // Reduced batch size for memory efficiency
  
  for (let i = 0; i < pages.length; i += batchSize) {
    const currentBatch = pages.slice(i, i + batchSize);
    const batchResults = await processBatchOfPages(currentBatch, distDirectory, astroConfig);
    processedPages.push(...batchResults);
    
    // Clear memory between batches
    if (global.gc) {
      global.gc();
    }
  }

  return sortPagesByPathname(processedPages);
}

/**
 * Process a batch of pages in parallel with memory cleanup
 */
async function processBatchOfPages(
  pageBatch: { pathname: string }[],
  distDirectory: string,
  astroConfig: AstroConfig
): Promise<PageData[]> {
  const batchPromises = pageBatch.map(async (page) => {
    try {
      const htmlFilePath = getHtmlFilePath(page.pathname, distDirectory);
      await fs.access(htmlFilePath);
      
      return await extractPageDataFromHtml(htmlFilePath, page.pathname, astroConfig);
    } catch (error) {
      console.warn(`⚠️ Could not process page: ${page.pathname}`);
      return null;
    }
  });

  const batchResults = await Promise.all(batchPromises);
  return batchResults.filter((page): page is PageData => page !== null);
}

/**
 * Extract page data from HTML file with memory-efficient processing
 */
async function extractPageDataFromHtml(
  htmlFilePath: string, 
  pathname: string, 
  astroConfig: AstroConfig
): Promise<PageData> {
  try {
    const htmlContent = await fs.readFile(htmlFilePath, "utf-8");
    const documentModel = new JSDOM(htmlContent);
    const document = documentModel.window.document;

    const extractedTitle = extractTitleFromDocument(document, pathname);
    const metaDescription = extractMetaDescription(document);
    const mainContent = await extractMainContentAsMarkdown(document);
    const lang = extractLanguageFromDocument(document);

    // Clean up JSDOM instance
    documentModel.window.close();

    return {
      pathname,
      title: extractedTitle,
      description: metaDescription,
      content: mainContent.trim(),
      slug: pathname,
      lang: lang
    };
  } catch (error) {
    throw new Error(`Failed to extract page data from ${htmlFilePath}: ${error}`);
  }
}

/**
 * Generate llms.txt index file in build directory
 */
async function generateLlmsIndexFile(
  pages: PageData[],
  config: Required<LlmsConfig>,
  distDirectory: string,
  astroConfig: AstroConfig,
  lang?: string // Add language parameter
): Promise<void> {
  const contentLines = createIndexFileContent(pages, config, astroConfig.site || "");
  const filename = lang ? `llms-${lang}.txt` : "llms.txt";
  await fs.writeFile(path.join(distDirectory, filename), contentLines, "utf-8");
}

/**
 * Generate llms-small.txt structure file in build directory
 */
async function generateLlmsSmallFile(
  pages: PageData[],
  config: Required<LlmsConfig>,
  distDirectory: string,
  astroConfig: AstroConfig,
  lang?: string
): Promise<void> {
  const contentLines = createSmallFileContent(pages, config, astroConfig.site || "");
  const filename = lang ? `llms-small-${lang}.txt` : "llms-small.txt";
  await fs.writeFile(path.join(distDirectory, filename), contentLines, "utf-8");
}

/**
 * Generate llms-full.txt content file in build directory
 */
async function generateLlmsFullFile(
  pages: PageData[],
  config: Required<LlmsConfig>,
  distDirectory: string,
  lang?: string // Add language parameter
): Promise<void> {
  const contentLines = createFullFileContent(pages, config);
  const filename = lang ? `llms-full-${lang}.txt` : "llms-full.txt";
  await fs.writeFile(path.join(distDirectory, filename), contentLines, "utf-8");
}

// ====== UTILITY FUNCTIONS ======

function createIndexFileContent(pages: PageData[], config: Required<LlmsConfig>, baseUrl: string): string {
  const lines: string[] = [
    `# ${config.title}`,
    `> ${config.description}`,
    "",
    "## Pages",
    ""
  ];

  const groupedPages = groupPagesByDirectory(pages);
  
  for (const [directoryName, directoryPages] of Object.entries(groupedPages)) {
    if (directoryName !== "/") {
      lines.push(`### ${directoryName}`);
      lines.push("");
    }
    
    for (const page of directoryPages) {
      const pageUrl = baseUrl ? new URL(page.pathname, baseUrl).toString() : page.pathname;
      const pageDescription = page.description ? ` - ${page.description}` : "";
      lines.push(`- [${page.title}](${pageUrl})${pageDescription}`);
    }
    lines.push("");
  }

  lines.push("", "*Auto-generated documentation index*");
  return lines.join("\n").trim();
}

function createSmallFileContent(pages: PageData[], config: Required<LlmsConfig>, baseUrl: string): string {
  const lines: string[] = [
    `# ${config.title}`,
    "> Structure-only documentation",
    ""
  ];

  for (const page of pages) {
    const pageUrl = baseUrl ? new URL(page.pathname, baseUrl).toString() : page.pathname;
    lines.push(`- [${page.title}](${pageUrl})`);
  }

  return lines.join("\n").trim();
}

function createFullFileContent(pages: PageData[], config: Required<LlmsConfig>): string {
  const lines: string[] = [
    `# ${config.title}`,
    `> ${config.description}`,
    "",
    "*Complete documentation content below*",
    ""
  ];

  const pageContents = pages
    .filter(page => page.content && page.content.length > 0)
    .map(page => {
      const parts = [`# ${page.title}`];
      if (page.description) {
        parts.push(`> ${page.description}`);
      }
      parts.push("", page.content!);
      return parts.join("\n");
    });

  lines.push(pageContents.join(config.customSeparator));
  return lines.join("\n").trim();
}

function groupPagesByDirectory(pages: PageData[]): Record<string, PageData[]> {
  const groups: Record<string, PageData[]> = {};
  
  for (const page of pages) {
    const directoryPath = path.dirname(page.pathname);
    const directoryName = directoryPath === "/" || directoryPath === "." 
      ? "/" 
      : directoryPath.split("/").filter(Boolean).pop() || "/";
    
    if (!groups[directoryName]) {
      groups[directoryName] = [];
    }
    groups[directoryName].push(page);
  }
  
  return groups;
}

function extractTitleFromDocument(document: Document, pathname: string): string {
  const h1Element = document.querySelector("h1");
  const titleElement = document.querySelector("title");
  
  return h1Element?.textContent?.trim() || 
         titleElement?.textContent?.trim() || 
         pathname.split("/").filter(Boolean).pop() || 
         "Untitled";
}

function extractMetaDescription(document: Document): string | undefined {
  return document
    .querySelector('meta[name="description"]')
    ?.getAttribute("content")
    ?.trim();
}

/**
 * Extract language from document
 * Tries to get language from html lang attribute, then from meta tags
 */
function extractLanguageFromDocument(document: Document): string | undefined {
  // Try to get language from html lang attribute
  const htmlElement = document.documentElement;
  let lang = htmlElement.getAttribute('lang');
  
  if (lang) {
    // Extract only the language code (e.g., 'en' from 'en-US')
    return lang.split('-')[0].toLowerCase();
  }
  
  // Try to get language from meta tags
  const metaLang = document
    .querySelector('meta[http-equiv="content-language"]')
    ?.getAttribute("content");
    
  if (metaLang) {
    // Extract only the language code
    return metaLang.split('-')[0].toLowerCase();
  }
  
  return undefined;
}

async function extractMainContentAsMarkdown(document: Document): Promise<string> {
  const mainElement = document.querySelector("main") || document.querySelector("body");
  
  if (!mainElement) return "";
  
  // Remove title to avoid duplication
  const h1Element = mainElement.querySelector("h1");
  if (h1Element) h1Element.remove();
  
  return await SimpleMarkdown(
    mainElement.innerHTML.trim(),
    ['header', 'footer', 'nav', '.no-llms', 'script', 'style'],
    false
  );
}

function getHtmlFilePath(pathname: string, distDirectory: string): string {
  if (pathname.endsWith("/")) {
    return path.join(distDirectory, pathname, "index.html");
  }
  
  const htmlFilePath = path.join(distDirectory, pathname + ".html");
  const indexFilePath = path.join(distDirectory, pathname, "index.html");
  
  return pathname.includes(".") ? htmlFilePath : indexFilePath;
}

function createCacheKey(astroConfig: AstroConfig, userConfig: LlmsConfig): string {
  return JSON.stringify({ astroConfig: astroConfig.site, userConfig });
}

function sortPagesByPathname(pages: PageData[]): PageData[] {
  return pages.sort((a, b) => a.pathname.localeCompare(b.pathname));
}

async function extractPackageDescription(): Promise<string> {
  try {
    const packageFilePath = path.join(process.cwd(), "package.json");
    const packageContent = await fs.readFile(packageFilePath, "utf-8");
    const packageData = JSON.parse(packageContent);
    return packageData.description || "";
  } catch {
    return "";
  }
}

function generateTitleFromSite(siteUrl?: string): string {
  if (!siteUrl) return "Documentation";
  
  try {
    const url = new URL(siteUrl);
    return url.hostname.replace(/^www\./, "");
  } catch {
    return siteUrl;
  }
}
